#### Test for selection bias

## Setup environment

setwd("~/research/coffee-dataverse/src")
do.allyears <- T

## Use Nijman and Verbeek (1992) to test for selection bias

library(lfe)
library(splines)
library(ggplot2)

## Run the regressions

mods <- list()
for (do.origspec in c(F, T)) {
    source("load.R", encoding="iso-8859-1")

    df2 <- df %>% group_by(Munic�pio) %>% arrange(year) %>% mutate(lagpres=c(NA, !is.na(logyield)[-length(logyield)]))

    formula.rhs <- paste("logyield ~ ", mainspec.rhs, "| 0 | Munic�pio + year")
    mod.yield <- felm(as.formula(formula.rhs), data=df2)

    mods[[ifelse(do.origspec, 1, 4)]] <- mod.yield

    formula.rhs <- paste("logyield ~ lagpres +", mainspec.rhs, "| 0 | Munic�pio + year")
    mod.yield <- felm(as.formula(formula.rhs), data=df2)

    mods[[ifelse(do.origspec, 2, 5)]] <- mod.yield
}

## Print regression tables

library(stargazer)

stargazer(mods, add.lines=list(c('Municipality FE', rep('Yes', 4)),
                               c('Year FE', rep('Yes', 4)),
                               c('State Trends', rep('Cubic', 4))),
          dep.var.labels="Log Yields", covariate.labels=c("Lag. Presence", "Average Min.", "Average Max.", weatherlabels[-1]),
          omit=':', df=F)

